home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/env python
-
- import xml.dom.minidom as dom
- import cgi
-
- default_styles = {
- 'Comment' : 'def:comment',
- 'String' : 'def:string',
- 'Preprocessor' : 'def:preprocessor',
- 'Keyword' : 'def:keyword',
- 'Data Type' : 'def:type',
- 'Decimal' : 'def:decimal',
- 'Specials' : 'def:specials',
- 'Function' : 'def:function',
- 'Base-N Integer' : 'def:base-n-integer',
- 'Floating Point' : 'def:floating-point',
- 'Floating point' : 'def:floating-point',
- 'Others' : None,
- 'Other' : None,
- 'Others 2' : None,
- 'Others 3' : None,
- }
-
- def escape_escape_char(ch):
- if ch == '\\':
- return '\\\\'
- elif ch in ['@']:
- return ch
- raise RuntimeError("don't know how to escape '%s'" % (ch,))
-
- def escape_regex(s):
- return cgi.escape(s)
-
- def normalize_id(id):
- if id == "C#":
- return "c-sharp"
- elif id == ".desktop":
- return "desktop"
- elif id == ".ini":
- return "ini"
- elif id == "C++ Line Comment":
- return "cpp-line-comment"
- elif id == "Markup (inline)":
- return "markup-inline"
- elif id == "Markup (block)":
- return "markup-block"
- else:
- return id.replace(', ', '-').replace('.', '-').replace('*', '-').replace(',', '-').replace(' ', '-').replace('/', '-').replace('#', '-').lower()
-
- class LangFile(object):
- def __init__(self, id, name, _name, section, _section, mimetypes, globs, filename):
- object.__init__(self)
-
- assert name or _name
- assert section or _section
-
- self.id = normalize_id(id or name or _name)
- self.name = name
- self._name = _name
- self.section = section
- self._section = _section
- self.mimetypes = mimetypes
- self.globs = globs
- self.filename = filename
- self.contexts = []
- self.escape_char = None
-
- def set_esc_char(self, char):
- self.escape_char = char
-
- def add_context(self, ctx):
- self.contexts.append(ctx)
-
- def format_header(self, indent):
- string = '<?xml version="1.0" encoding="UTF-8"?>\n<language id="%s"' % (self.id,)
-
- if self.name:
- string += ' name="%s"' % (self.name,)
- else:
- string += ' _name="%s"' % (self._name,)
-
- string += ' version="2.0"'
-
- if self.section:
- string += ' section="%s"' % (self.section,)
- else:
- string += ' _section="%s"' % (self._section,)
-
- string += '>\n'
-
- if self.mimetypes or self.globs:
- string += indent + '<metadata>\n'
- if self.mimetypes:
- string += 2*indent + '<property name="mimetypes">%s</property>\n' % (cgi.escape(self.mimetypes),)
- if self.globs:
- string += 2*indent + '<property name="globs">%s</property>\n' % (cgi.escape(self.globs),)
- string += indent + '</metadata>\n\n'
-
- return string
-
- def format_footer(self, indent):
- return '</language>\n'
-
- def format_styles(self, indent):
- string = indent + "<styles>\n"
- styles = {}
- for ctx in self.contexts:
- map_to = default_styles[ctx.style_name]
- styles[ctx.style] = [ctx.style_name, map_to]
- for s in styles:
- id = s
- name, map_to = styles[s]
- if map_to:
- string += indent*2 + '<style id="%s" _name="%s" map-to="%s"/>\n' % (id, name, map_to)
- else:
- string += indent*2 + '<style id="%s" _name="%s"/>\n' % (id, name)
- string += indent + "</styles>\n\n"
- return string
-
- def format_contexts(self, indent):
- string = indent + '<definitions>\n'
-
- if self.escape_char and self.escape_char != '\\':
- char = escape_escape_char(self.escape_char)
-
- string += indent*2 + '<context id="generated-escape">\n'
- string += indent*3 + '<match>%s.</match>\n' % (char,)
- string += indent*2 + '</context>\n'
-
- string += indent*2 + '<context id="generated-line-escape">\n'
- string += indent*3 + '<start>%s$</start>\n' % (char,)
- string += indent*3 + '<end>^</end>\n'
- string += indent*2 + '</context>\n'
-
- for ctx in self.contexts:
- if self.escape_char:
- if self.escape_char != '\\':
- esc_ctx = 'generated-escape'
- line_esc_ctx = 'generated-line-escape'
- else:
- esc_ctx = 'def:escape'
- line_esc_ctx = 'def:line-continue'
- else:
- esc_ctx = None
- line_esc_ctx = None
-
- string += ctx.format(indent, esc_ctx, line_esc_ctx)
-
- string += indent*2 + '<context id="%s">\n' % (self.id,)
- string += indent*3 + '<include>\n'
- for ctx in self.contexts:
- string += indent*4 + '<context ref="%s"/>\n' % (ctx.id,)
- string += indent*3 + '</include>\n'
- string += indent*2 + '</context>\n'
-
- string += indent + '</definitions>\n'
- return string
-
- def format(self, indent=' '):
- string = self.format_header(indent)
- string += self.format_styles(indent)
- string += self.format_contexts(indent)
- string += self.format_footer(indent)
- return string
-
- class Context(object):
- def __init__(self, name, _name, style):
- object.__init__(self)
- assert (name or _name) and style
- self.name = name
- self._name = _name
- self.style_name = style
- self.style = style.replace(' ', '-').lower()
- self.id = normalize_id(name or _name)
- self.is_container = False
-
- def format(self, indent, esc_ctx, line_esc_ctx):
- print "Implement me: %s.format()" % (type(self).__name__,)
- return indent*2 + '<context id="%s"/>\n' % (self.id)
-
- def format_escape(self, indent, esc_ctx, line_esc_ctx):
- string = ""
- if self.is_container and esc_ctx is not None:
- string += indent*3 + '<include>\n'
- string += indent*4 + '<context ref="%s"/>\n' % (esc_ctx,)
- string += indent*4 + '<context ref="%s"/>\n' % (line_esc_ctx,)
- string += indent*3 + '</include>\n'
- return string
-
- class KeywordList(Context):
- def __init__(self, name, _name, style, keywords, case_sensitive,
- match_empty_string_at_beginning,
- match_empty_string_at_end,
- beginning_regex, end_regex):
- Context.__init__(self, name, _name, style)
- self.keywords = keywords
- self.case_sensitive = case_sensitive # ???
- self.match_empty_string_at_beginning = match_empty_string_at_beginning
- self.match_empty_string_at_end = match_empty_string_at_end
- self.beginning_regex = beginning_regex
- self.end_regex = end_regex
-
- def format(self, indent, esc_ctx, line_esc_ctx):
- string = indent*2 + '<context id="%s" style-ref="%s">\n' % (self.id, self.style)
-
- if self.beginning_regex:
- string += indent*3 + '<prefix>%s</prefix>\n' % (escape_regex(self.beginning_regex),)
- elif not self.match_empty_string_at_beginning:
- string += indent*3 + '<prefix></prefix>\n'
-
- if self.end_regex:
- string += indent*3 + '<suffix>%s</suffix>\n' % (escape_regex(self.end_regex),)
- elif not self.match_empty_string_at_end:
- string += indent*3 + '<suffix></suffix>\n'
-
- for kw in self.keywords:
- string += indent*3 + '<keyword>%s</keyword>\n' % (escape_regex(kw),)
-
- string += self.format_escape(indent, esc_ctx, line_esc_ctx)
- string += indent*2 + '</context>\n'
- return string
-
- class PatternItem(Context):
- def __init__(self, name, _name, style, pattern):
- Context.__init__(self, name, _name, style)
- assert pattern
- self.pattern = pattern
-
- def format(self, indent, esc_ctx, line_esc_ctx):
- string = indent*2 + '<context id="%s" style-ref="%s">\n' % (self.id, self.style)
- string += indent*3 + '<match>%s</match>\n' % (escape_regex(self.pattern),)
- string += self.format_escape(indent, esc_ctx, line_esc_ctx)
- string += indent*2 + '</context>\n'
- return string
-
- class LineComment(Context):
- def __init__(self, name, _name, style, start):
- Context.__init__(self, name, _name, style)
- assert start
- self.start = start
- self.is_container = True
-
- def format(self, indent, esc_ctx, line_esc_ctx):
- string = indent*2 + '<context id="%s" style-ref="%s" end-at-line-end="true">\n' % (self.id, self.style)
- string += indent*3 + '<start>%s</start>\n' % (escape_regex(self.start),)
- string += self.format_escape(indent, esc_ctx, line_esc_ctx)
- string += indent*2 + '</context>\n'
- return string
-
- class BlockComment(Context):
- def __init__(self, name, _name, style, start, end):
- Context.__init__(self, name, _name, style)
- assert start and end
- self.start = start
- self.end = end
- self.is_container = True
-
- def format(self, indent, esc_ctx, line_esc_ctx):
- string = indent*2 + '<context id="%s" style-ref="%s">\n' % (self.id, self.style)
- string += indent*3 + '<start>%s</start>\n' % (escape_regex(self.start),)
- string += indent*3 + '<end>%s</end>\n' % (escape_regex(self.end),)
- string += self.format_escape(indent, esc_ctx, line_esc_ctx)
- string += indent*2 + '</context>\n'
- return string
-
- class String(Context):
- def __init__(self, name, _name, style, start, end, end_at_line_end):
- Context.__init__(self, name, _name, style)
- assert start and end
- self.start = start
- if end and end.endswith("\\n"):
- end = end[:-2]
- end_at_line_end = True
- self.end = end
- self.end_at_line_end = end_at_line_end
- self.is_container = True
-
- def format(self, indent, esc_ctx, line_esc_ctx):
- string = indent*2 + '<context id="%s" style-ref="%s"' % (self.id, self.style)
- if self.end_at_line_end:
- string += ' end-at-line-end="true"'
- string += '>\n'
-
- if self.start:
- string += indent*3 + '<start>%s</start>\n' % (escape_regex(self.start),)
- if self.end:
- string += indent*3 + '<end>%s</end>\n' % (escape_regex(self.end),)
-
- string += self.format_escape(indent, esc_ctx, line_esc_ctx)
- string += indent*2 + '</context>\n'
- return string
-
- class SyntaxItem(Context):
- def __init__(self, name, _name, style, start, end):
- Context.__init__(self, name, _name, style)
- assert start and end
- self.start = start
- self.end = end
- self.end_at_line_end = False
- if end and end.endswith("\\n"):
- self.end = end[:-2]
- self.end_at_line_end = True
- self.is_container = True
-
- def format(self, indent, esc_ctx, line_esc_ctx):
- string = indent*2 + '<context id="%s" style-ref="%s"' % (self.id, self.style)
- if self.end_at_line_end:
- string += ' end-at-line-end="true"'
- string += '>\n'
-
- if self.start:
- string += indent*3 + '<start>%s</start>\n' % (escape_regex(self.start),)
- if self.end:
- string += indent*3 + '<end>%s</end>\n' % (escape_regex(self.end),)
-
- string += self.format_escape(indent, esc_ctx, line_esc_ctx)
- string += indent*2 + '</context>\n'
- return string
-
- def first_child(node):
- child = node.firstChild
- while child is not None and child.nodeType != dom.Node.ELEMENT_NODE:
- child = child.nextSibling
- return child
- def next_sibling(node):
- next = node.nextSibling
- while next is not None and next.nodeType != dom.Node.ELEMENT_NODE:
- next = next.nextSibling
- return next
-
- def parseLineComment(cur, name, _name, style):
- child = first_child(cur)
- assert child is not None and child.tagName == "start-regex"
- return LineComment(name, _name, style, child.firstChild.nodeValue)
-
- def parseBlockComment(cur, name, _name, style):
- start_regex = None
- end_regex = None
- child = first_child(cur)
-
- while child is not None:
- if child.tagName == "start-regex":
- start_regex = child.firstChild.nodeValue
- elif child.tagName == "end-regex":
- end_regex = child.firstChild.nodeValue
- child = next_sibling(child)
-
- assert start_regex is not None
- assert end_regex is not None
-
- return BlockComment(name, _name, style, start_regex, end_regex)
-
- def parseString(cur, name, _name, style):
- start_regex = None
- end_regex = None
- end_at_line_end = True
-
- prop = cur.getAttribute("end-at-line-end")
- if prop:
- if prop in ["TRUE", "1"]:
- end_at_line_end = True
- else:
- end_at_line_end = False
-
- child = first_child(cur)
-
- while child is not None:
- if child.tagName == "start-regex":
- start_regex = child.firstChild.nodeValue
- elif child.tagName == "end-regex":
- end_regex = child.firstChild.nodeValue
- child = next_sibling(child)
-
- assert start_regex is not None
- assert end_regex is not None
-
- return String(name, _name, style, start_regex, end_regex, end_at_line_end)
-
- def parseKeywordList(cur, name, _name, style):
- case_sensitive = True
- match_empty_string_at_beginning = True
- match_empty_string_at_end = True
- beginning_regex = None
- end_regex = None
- keywords = []
-
- prop = cur.getAttribute("case-sensitive")
- if prop:
- if prop in ["TRUE", "1"]:
- case_sensitive = True
- else:
- case_sensitive = False
-
- prop = cur.getAttribute("match-empty-string-at-beginning")
- if prop:
- if prop in ["TRUE", "1"]:
- match_empty_string_at_beginning = True
- else:
- match_empty_string_at_beginning = False
-
- prop = cur.getAttribute("match-empty-string-at-end")
- if prop:
- if prop in ["TRUE", "1"]:
- match_empty_string_at_end = True
- else:
- match_empty_string_at_end = False
-
- prop = cur.getAttribute("beginning-regex")
- if prop:
- beginning_regex = prop
-
- prop = cur.getAttribute("end-regex")
- if prop:
- end_regex = prop
-
- child = first_child(cur)
-
- while child is not None:
- if child.tagName == "keyword":
- keywords.append(child.firstChild.nodeValue)
- child = next_sibling(child)
-
- assert keywords
-
- return KeywordList(name, _name, style, keywords, case_sensitive,
- match_empty_string_at_beginning,
- match_empty_string_at_end,
- beginning_regex, end_regex)
-
- def parsePatternItem(cur, name, _name, style):
- child = first_child(cur)
- assert child is not None and child.tagName == "regex"
- return PatternItem(name, _name, style, child.firstChild.nodeValue)
-
- def parseSyntaxItem(cur, name, _name, style):
- start_regex = None
- end_regex = None
-
- child = first_child(cur)
-
- while child is not None:
- if child.tagName == "start-regex":
- start_regex = child.firstChild.nodeValue
- elif child.tagName == "end-regex":
- end_regex = child.firstChild.nodeValue
- child = next_sibling(child)
-
- assert start_regex is not None
- assert end_regex is not None
-
- return SyntaxItem(name, _name, style, start_regex, end_regex)
-
- def parseTag(cur):
- _name = None
- name = None
-
- _name = cur.getAttribute("_name")
- name = cur.getAttribute("name")
- assert name or _name
- style = cur.getAttribute("style") or "Normal"
-
- if cur.tagName == "line-comment":
- ctx = parseLineComment(cur, name, _name, style)
- elif cur.tagName == "block-comment":
- ctx = parseBlockComment(cur, name, _name, style)
- elif cur.tagName == "string":
- ctx = parseString(cur, name, _name, style)
- elif cur.tagName == "keyword-list":
- ctx = parseKeywordList(cur, name, _name, style)
- elif cur.tagName == "pattern-item":
- ctx = parsePatternItem(cur, name, _name, style)
- elif cur.tagName == "syntax-item":
- ctx = parseSyntaxItem(cur, name, _name, style)
- else:
- print "Unknown tag: %s" % (cur.tagName,)
- ctx = None
-
- return ctx
-
- def parse_file(filename):
- doc = dom.parse(filename)
- node = doc.documentElement
- contexts = []
- esc_char = None
-
- assert node.tagName == "language"
-
- lang_file = LangFile(node.getAttribute("id"),
- node.getAttribute("name"),
- node.getAttribute("_name"),
- node.getAttribute("section"),
- node.getAttribute("_section"),
- node.getAttribute("mimetypes"),
- node.getAttribute("globs"),
- filename)
-
- node = first_child(node)
- assert node is not None
-
- while node is not None:
- if node.tagName == "escape-char":
- lang_file.set_esc_char(node.firstChild.nodeValue)
- else:
- lang_file.add_context(parseTag(node))
- node = next_sibling(node)
-
- return lang_file
-
- if __name__ == '__main__':
- import sys
-
- if not sys.argv[1:]:
- print "usage: %s LANG_FILE" % (sys.argv[0])
- sys.exit(1)
-
- lang_file = parse_file(sys.argv[1])
- sys.stdout.write(lang_file.format())
-